# Copyright (c) 2019 Red Hat, Inc. All rights reserved. This copyrighted
# material is made available to anyone wishing to use, modify, copy, or
# redistribute it subject to the terms and conditions of the GNU General Public
# License v.2 or later.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Patch operations."""

import re
from urllib.parse import urlparse

from cki_lib.session import get_session

SESSION = get_session('cki.kpet.patch')


class UnrecognizedFormat(Exception):
    """Unrecognized patch format."""


class UnrecognizedPathFormat(UnrecognizedFormat):
    """Unrecognized format of a path in a diff header of a patch."""


def _get_diff_file(diff_path):
    """
    Extract file path from a diff header.

    Return None if file doesn't exist before/after the change. Throw an
    exception if the path is invalid.

    Args:
        diff_path:  The file path from the diff header.
    Returns:
        Source file path if the file exists before/after the change.
    Raises:
        UnrecognizedPathFormat: the file path was invalid.
    """
    if diff_path == "/dev/null":
        return None
    slash_idx = diff_path.find("/")
    # If a path has no slash, or starts/ends with a slash
    if slash_idx <= 0 or diff_path[-1] == "/":
        raise UnrecognizedPathFormat(diff_path)
    # Strip top directory
    return diff_path[slash_idx + 1:]


def get_file_set(patch):
    """
    Get the set of paths to files modified by the patch.

    Args:
        patch: Contents of the patch to extract modified paths from.
    Returns:
        The set of file paths modified by the patch.
    Raises:
        UnrecognizedFormat: patch format was invalid.
    """
    if patch == '':
        return set()
    pattern = re.compile(r'^From (.|\n)*?'
                         r'^---$'
                         r'|'
                         r'^diff +[^ ]+ +(\S+) +(\S+)$'
                         r'|'
                         r'^--- (\S+)(\s.*)?\n'
                         r'\+\+\+ (\S+)(\s.*)?$',
                         re.MULTILINE)
    file_set = set()
    diff = False
    for match in re.finditer(pattern, patch):
        if match.group(0).startswith("From "):
            continue

        (_,
         diff_old, diff_new,
         line_old, _, line_new, _) = match.groups()

        # If this is a diff header
        if diff_old or diff_new:
            diff = True
            old = diff_old
            new = diff_new
        else:
            # If we already extracted paths from the diff header
            if diff:
                diff = False
                continue
            # This is a patch without diff header
            old = line_old
            new = line_new
        try:
            old_file = _get_diff_file(old)
            new_file = _get_diff_file(new)
        except UnrecognizedPathFormat as exc:
            raise UnrecognizedFormat("Invalid path in a diff header"
                                     ) from exc
        if not old_file and not new_file:
            raise UnrecognizedFormat("No valid paths in a diff header")
        if old_file:
            file_set.add(old_file)
        if new_file:
            file_set.add(new_file)

    if not file_set:
        raise UnrecognizedFormat("No changed files")
    return file_set


def load_from_location(location, cookies=None):
    """
    Load patch content from a patch location (URL or path).

    Args:
        location:   A patch location (URL or path).
        cookies:    A cookie jar object to use when fetching URL locations,
                    if not None.
    Returns:
        The patch content.
    """
    # If it's a url
    if urlparse(location).scheme:
        response = SESSION.get(location, cookies=cookies)
        response.raise_for_status()
        content = response.text
    # Else it's a local file
    else:
        # Some upstream patches are encoded as cp1252, iso-8859-1, or utf-8.
        # This is the recommended method from:
        #   http://python-notes.curiousefficiency.org/
        with open(location, encoding="ascii",
                  errors="surrogateescape") as patch_file:
            content = patch_file.read()
    return content


def get_file_set_from_location_set(location_set, cookies=None):
    """
    Get the set of paths to files from patches at locations.

    Get the set of paths to files modified by patches at a set of locations.

    Args:
        location_set:   A set of locations (URLs or paths), to load patches
                        and extract the modified files from. Can be any
                        iterable beside the set.
        cookies:        A cookie jar object to use when fetching URL
                        locations, if not None.
    Returns:
        The set of paths to modified files.
    Raises:
        UnrecognizedFormat: The format of a patch was invalid.
    """
    file_set = set()
    for location in location_set:
        try:
            file_set |= get_file_set(load_from_location(location, cookies))
        except UnrecognizedFormat as exc:
            raise UnrecognizedFormat(f"Can't parse contents of {location}"
                                     ) from exc
    return file_set
